## Code to create graphs and tables
## to address 2nd round rnr concerns for POQ
##
## Jen Wu, Jan 2023


# Initial settings --------------------------------------------------------
pacman::p_load(tidyverse, xtable, viridis,
               estimatr, stargazer, sandwich,
               lmtest, margins)
theme_set(theme_bw())
my_stars <- c(0.05, 0.01, 0.001)


# Load in data ------------------------------------------------------------

df_all <- read_rds("jen_data/cleaned/df_all")


# Editor concern 1, R1 concern 1 -----------------------------------------------

# Subset on Blacks who identify as Democrats (leave out Republicans) and show 
# how ideological self-identification (from most liberal to most conservative) 
# explains social and economic policy preferences, respectively, in this group, 
# while including relevant controls.


lm_econp <- lm(econ_policy.index~ 
                 r_ideology.center +
                 r_age + r_gender + r_education + r_income +
                 factor(year), df_all,
               subset = df_all$r_race == "Black" & 
                 df_all$r_party_name == "Democrat",
               weights = df_all$weight)
lm_socp <- lm(social_policy.index~
                r_ideology.center +
                r_age + r_gender + r_education + r_income +
                factor(year), df_all,
              subset = df_all$r_race == "Black" & 
                df_all$r_party_name == "Democrat",
              weights = df_all$weight)


lm_econp_var <- lm(econ_policy.index~ 
                     r_ideology.center +
                     morality.index.center + religiosity.index.center +
                     r_age + r_gender + r_education + r_income +
                     factor(year), df_all,
                   subset = df_all$r_race == "Black" & 
                     df_all$r_party_name == "Democrat",
                   weights = df_all$weight)
lm_socp_var <- lm(social_policy.index~
                    r_ideology.center +
                    morality.index.center + religiosity.index.center +
                    r_age + r_gender + r_education + r_income +
                    factor(year), df_all,
                  subset = df_all$r_race == "Black" & 
                    df_all$r_party_name == "Democrat",
                  weights = df_all$weight)

summary(lm_econp)
summary(lm_socp)

summary(lm_econp_var)
summary(lm_socp_var)


se_policy_list <- list(coeftest(lm_econp, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_socp, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_econp_var, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_socp_var, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2])


policy_covlabs <- c("Ideology (Conservative)", 
                    "Morality Index",
                    "Religiosity Index")


stargazer(lm_econp, lm_socp, lm_econp_var,lm_socp_var, 
          se = se_policy_list,
          type = "html",
          dep.var.caption = "Policy preferences",
          covariate.labels = policy_covlabs,
          dep.var.labels = c("Econ.", "Social", "Econ.", "Social"),
          omit.stat = c("rsq", "f", "ser"),
          star.cutoffs = my_stars,
          title = "Policy preferences and ideology among Black Democrats",
          omit = c("r_age",
                   "r_income",
                   "r_education",
                   "r_gender",
                   "year"),
          out = "jen_output/tables/policy_regs_bdems.html")

# now for whites
lm_econpw <- lm(econ_policy.index~ 
                 r_ideology.center +
                 r_age + r_gender + r_education + r_income +
                 factor(year), df_all,
               subset = df_all$r_race == "White" & 
                 df_all$r_party_name == "Democrat",
               weights = df_all$weight)
lm_socpw <- lm(social_policy.index~
                r_ideology.center +
                r_age + r_gender + r_education + r_income +
                factor(year), df_all,
              subset = df_all$r_race == "White" & 
                df_all$r_party_name == "Democrat",
              weights = df_all$weight)


lm_econp_varw <- lm(econ_policy.index~ 
                     r_ideology.center +
                     morality.index.center + religiosity.index.center +
                     r_age + r_gender + r_education + r_income +
                     factor(year), df_all,
                   subset = df_all$r_race == "White" & 
                     df_all$r_party_name == "Democrat",
                   weights = df_all$weight)
lm_socp_varw <- lm(social_policy.index~
                    r_ideology.center +
                    morality.index.center + religiosity.index.center +
                    r_age + r_gender + r_education + r_income +
                    factor(year), df_all,
                  subset = df_all$r_race == "White" & 
                    df_all$r_party_name == "Democrat",
                  weights = df_all$weight)

summary(lm_econpw)
summary(lm_socpw)

summary(lm_econp_varw)
summary(lm_socp_varw)


se_policy_listw <- list(coeftest(lm_econpw, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_socpw, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_econp_varw, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_socp_varw, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2])


policy_covlabs <- c("Ideology (Conservative)", 
                    "Morality Index",
                    "Religiosity Index")


stargazer(lm_econpw, lm_socpw, lm_econp_varw,lm_socp_varw, 
          se = se_policy_listw,
          type = "html",
          dep.var.caption = "Policy preferences",
          covariate.labels = policy_covlabs,
          dep.var.labels = c("Econ.", "Social", "Econ.", "Social"),
          omit.stat = c("rsq", "f", "ser"),
          star.cutoffs = my_stars,
            title = "Policy preferences and ideology among White Democrats",
          omit = c("r_age",
                   "r_income",
                   "r_education",
                   "r_gender",
                   "year"),
          out = "jen_output/tables/policy_regs_wdems.html")

# Subset on Blacks who identify as Democrats (leave out Republicans) and show 
# how ideological self-identification (from most liberal to most conservative) 
# explains social and economic policy preferences, respectively, in this group, 
# while including relevant controls.
lm_econp2 <- lm(econ_policy.index~ 
                 r_ideology.center*ideo_awareness_scaled.center +
                 r_age + r_gender + r_education + r_income +
                 factor(year), df_all,
               subset = df_all$r_race == "Black" & 
                 df_all$r_party_name == "Democrat",
               weights = df_all$weight)
lm_socp2 <- lm(social_policy.index~
                 r_ideology.center*ideo_awareness_scaled.center +
                r_age + r_gender + r_education + r_income +
                factor(year), df_all,
              subset = df_all$r_race == "Black" & 
                df_all$r_party_name == "Democrat",
              weights = df_all$weight)


lm_econp_var2 <- lm(econ_policy.index~ 
                      r_ideology.center*ideo_awareness_scaled.center +
                     morality.index.center + religiosity.index.center +
                     r_age + r_gender + r_education + r_income +
                     factor(year), df_all,
                   subset = df_all$r_race == "Black" & 
                     df_all$r_party_name == "Democrat",
                   weights = df_all$weight)
lm_socp_var2 <- lm(social_policy.index~
                     r_ideology.center*ideo_awareness_scaled.center +
                    morality.index.center + religiosity.index.center +
                    r_age + r_gender + r_education + r_income +
                    factor(year), df_all,
                  subset = df_all$r_race == "Black" & 
                    df_all$r_party_name == "Democrat",
                  weights = df_all$weight)

summary(lm_econp2)
summary(lm_socp2)

summary(lm_econp_var2)
summary(lm_socp_var2)


se_policy_list2 <- list(coeftest(lm_econp2, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_socp2, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_econp_var2, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2],
                       coeftest(lm_socp_var2, vcov = vcovCL, 
                                cluster = ~year, type = "HC1")[,2])


policy_covlabs2 <- c("Ideology (Conservative)", 
                     "Liberal-Conservative Familiarity Score",
                    "Morality Index",
                    "Religiosity Index",
                    "Ideology x Familiarity Score")


stargazer(lm_econp2, lm_socp2, lm_econp_var2,lm_socp_var2, 
          se = se_policy_list2,
          type = "html",
          dep.var.caption = "Policy preferences",
          covariate.labels = policy_covlabs2,
          dep.var.labels = c("Econ.", "Social", "Econ.", "Social"),
          omit.stat = c("rsq", "f", "ser"),
          star.cutoffs = my_stars,
          title = "Policy preferences, ideology, and familiarity score among Black Democrats",
          omit = c("r_age",
                   "r_income",
                   "r_education",
                   "r_gender",
                   "year"),
          out = "jen_output/tables/policy_regs_bdems_ideofam.html")

# for whites
lm_econp2w <- lm(econ_policy.index~ 
                  r_ideology.center*ideo_awareness_scaled.center +
                  r_age + r_gender + r_education + r_income +
                  factor(year), df_all,
                subset = df_all$r_race == "White" & 
                  df_all$r_party_name == "Democrat",
                weights = df_all$weight)
lm_socp2w <- lm(social_policy.index~
                 r_ideology.center*ideo_awareness_scaled.center +
                 r_age + r_gender + r_education + r_income +
                 factor(year), df_all,
               subset = df_all$r_race == "White" & 
                 df_all$r_party_name == "Democrat",
               weights = df_all$weight)


lm_econp_var2w <- lm(econ_policy.index~ 
                      r_ideology.center*ideo_awareness_scaled.center +
                      morality.index.center + religiosity.index.center +
                      r_age + r_gender + r_education + r_income +
                      factor(year), df_all,
                    subset = df_all$r_race == "White" & 
                      df_all$r_party_name == "Democrat",
                    weights = df_all$weight)
lm_socp_var2w <- lm(social_policy.index~
                     r_ideology.center*ideo_awareness_scaled.center +
                     morality.index.center + religiosity.index.center +
                     r_age + r_gender + r_education + r_income +
                     factor(year), df_all,
                   subset = df_all$r_race == "White" & 
                     df_all$r_party_name == "Democrat",
                   weights = df_all$weight)

summary(lm_econp2w)
summary(lm_socp2w)

summary(lm_econp_var2w)
summary(lm_socp_var2w)


se_policy_list2w <- list(coeftest(lm_econp2w, vcov = vcovCL, 
                                 cluster = ~year, type = "HC1")[,2],
                        coeftest(lm_socp2w, vcov = vcovCL, 
                                 cluster = ~year, type = "HC1")[,2],
                        coeftest(lm_econp_var2w, vcov = vcovCL, 
                                 cluster = ~year, type = "HC1")[,2],
                        coeftest(lm_socp_var2w, vcov = vcovCL, 
                                 cluster = ~year, type = "HC1")[,2])


policy_covlabs2 <- c("Ideology (Conservative)", 
                     "Liberal-Conservative Familiarity Score",
                     "Morality Index",
                     "Religiosity Index",
                     "Ideology x Familiarity Score")


stargazer(lm_econp2w, lm_socp2w, lm_econp_var2w,lm_socp_var2w, 
          se = se_policy_list2w,
          type = "html",
          dep.var.caption = "Policy preferences",
          covariate.labels = policy_covlabs2,
          dep.var.labels = c("Econ.", "Social", "Econ.", "Social"),
          omit.stat = c("rsq", "f", "ser"),
          star.cutoffs = my_stars,
          title = "Policy preferences, ideology, and familiarity score among White Democrats",
          omit = c("r_age",
                   "r_income",
                   "r_education",
                   "r_gender",
                   "year"),
          out = "jen_output/tables/policy_regs_wdems_ideofam.html")


# Editor concern 4, R2 concern --------------------------------------------
# pool data across time to get correlation
# and familiarity score dist

# get correlation coef for Blacks and whites
df_all %>% 
  group_by(r_race) %>% 
  summarise(ideo_party_cor = cor(r_ideology, r_party, 
                                 use = "pairwise.complete.obs"),
            obs = n()) %>% 
  filter(r_race == "White" | r_race == "Black") %>% 
  xtable(digits = c(0,0,3,3),
         caption = "Ideology and party correlation 1972-2016") %>% 
  print(include.rownames= FALSE, type = "html", caption.placement="top") %>% 
  cat(file = "jen_output/tables/overall_ideopar_cor_tab.html")


# same but 1984-2016
df_all %>% 
  filter(year >=1984) %>% 
  group_by(r_race) %>% 
  summarise(ideo_party_cor = cor(r_ideology, r_party, 
                                 use = "pairwise.complete.obs"),
            obs = n()) %>% 
  filter(r_race == "White" | r_race == "Black") %>% 
  xtable(digits = c(0,0,3,3),
         caption = "Ideology and party correlation 1984-2016") %>% 
  print(include.rownames= FALSE, type = "html", caption.placement="top") %>% 
  cat(file = "jen_output/tables/overall_ideopar_cor_tab1984.html")


# lib familiarity score, hist
g_famscores_hist_pooled <- df_all %>% 
  filter(!is.na(ideo_awareness), 
         r_race == "White" | r_race == "Black") %>% 
  group_by(year, r_race, ideo_awareness) %>% 
  tally() %>% 
  mutate(total = sum(n)) %>% 
  mutate(prop = n/total) %>% 
  ungroup() %>% 
  group_by(ideo_awareness, r_race) %>% 
  summarise(avg_prop = mean(prop)) %>% 
  ungroup() %>% 
  ggplot() +
  geom_col(aes(x = ideo_awareness, y = avg_prop,
               fill = r_race), 
           position = position_dodge(), color = "black") + 
  labs(fill = "Race", x = "Liberal-Conservative Familiarity (0-lowest, 5 highest)",
       y = "Proportion", title = "Mean proportion of scores over 1972-2016") +
  scale_x_continuous(breaks = seq(0,5)) +
  scale_fill_manual(values=c("white", "black"))

g_famscores_hist_pooled
ggsave(g_famscores_hist_pooled, 
       filename = "jen_output/figures/famscores_hist_pooled.pdf",
       height = 4, width = 6)

# scaled and centered lib-cons familiarity score, density
g_famscores_dens_pooled <- df_all %>% 
  filter(!is.na(ideo_awareness_scaled.center), 
         r_race == "White" | r_race == "Black") %>% 
  ggplot() +
  geom_density(aes(x = ideo_awareness_scaled.center, fill = r_race, 
                   colour = r_race), alpha = .2) + 
  labs(fill = "Race", color = "Race",
       x = "Liberal-Conservative Familiarity Score, Scaled and Centered",
       title = "Denisty of Scores (Pooled 1972-2016)") +
  scale_fill_manual(values=c("#6699CC", "gray")) +
  scale_color_manual(values=c("#6699CC", "black"))
g_famscores_dens_pooled

ggsave(g_famscores_dens_pooled, 
       filename = "jen_output/figures/famscores_dens_pooled.pdf",
       height = 4, width = 6)

# scaled lib-conservative familiarty score, hist
g_famscores_hist_pooled_scaled <-df_all %>% 
  filter(!is.na(ideo_awareness_scaled), 
         r_race == "White" | r_race == "Black") %>% 
  group_by(year, r_race, ideo_awareness_scaled) %>% 
  tally() %>% 
  mutate(total = sum(n)) %>% 
  mutate(prop = n/total) %>% 
  ungroup() %>% 
  group_by(ideo_awareness_scaled, r_race) %>% 
  summarise(avg_prop = mean(prop)) %>% 
  ungroup() %>%  
  ggplot() +
  geom_col(aes(x = ideo_awareness_scaled, y = avg_prop,
               fill = r_race), 
           position = position_dodge(), color = "black") + 
  labs(fill = "Race", x = "Scaled Liberal-Conservative Familiarity Score (0-lowest, 1-highest)",
       y = "Proportion", title = "Mean proportion 1972-2016") +
  scale_x_continuous(breaks = seq(0,5)) +``
scale_fill_manual(values=c("white", "black"))

ggsave(g_famscores_hist_pooled_scaled, 
       filename = "jen_output/figures/famscores_scaled_hist_pooled.pdf",
       height = 4, width = 6)



# Editor concern 5 --------------------------------------------------------
# predicted party id for lowest and highest end of familiarity scores

# among conservatives using regs from table 1 
# main table with year fixed effects (covers years 2012, 2016)
lm_partyideo_rc <- lm(
  r_party ~ 
    ideo_awareness_scaled.center*r_ideology.center +
    race_of_interviewer*r_ideology.center +
    racial_consciousness.center*r_ideology.center +
    religiosity.index +
    morality.index +
    r_age +
    r_income +
    r_gender +
    r_education +
    factor(year),
  data = df_all,
  subset = df_all$r_race == "Black",
  weights = weight)

summary(lm_partyideo_rc)


# main table but no racial consciousness with year fixed effects
# (covers years... )
lm_partyideo <- lm(
  r_party ~ 
    ideo_awareness_scaled.center*r_ideology.center +
    race_of_interviewer*r_ideology.center +
    religiosity.index +
    morality.index +
    r_age +
    r_income +
    r_gender +
    r_education +
    factor(year),
  data = df_all,
  subset = df_all$r_race == "Black",
  weights = weight)

summary(lm_partyideo)


# no race of interviewer
lm_partyideo_nri <- lm(
  r_party ~ 
    ideo_awareness_scaled.center*r_ideology.center +
    religiosity.index +
    morality.index +
    r_age +
    r_income +
    r_gender +
    r_education +
    factor(year),
  data = df_all,
  subset = df_all$r_race == "Black",
  weights = weight)

summary(lm_partyideo_nri)

se_main_list <- list(coeftest(lm_partyideo_nri, vcov = vcovCL, 
                              cluster = ~year, type = "HC1")[,2],
                     coeftest(lm_partyideo, vcov = vcovCL, 
                              cluster = ~year, type = "HC1")[,2],
                     coeftest(lm_partyideo_rc, vcov = vcovCL, 
                              cluster = ~year, type = "HC1")[,2])

start <- Sys.time()
xvals_oi <- df_all %>% 
  filter(r_race == "Black", !is.na(r_ideology.center),
         !is.na(ideo_awareness_scaled.center),
         !is.na(religiosity.index), !is.na(morality.index),
        !is.na(r_age), !is.na(r_income), !is.na(r_gender), 
        !is.na(r_education)) %>% 
  summarise(xmin = min(ideo_awareness_scaled.center),
            xmed = median(ideo_awareness_scaled.center),
            xmax = max(ideo_awareness_scaled.center)) %>% 
  t()


ideo_libcon_me_nri <- cplot(lm_partyideo_nri,
                            xvals = xvals_oi[,1],
                            x = "ideo_awareness_scaled.center", 
                            dx = "r_ideology.center", what = "effect",
                            vcov = vcovCL(lm_partyideo_nri, cluster = ~year, type = "HC1"),
                            draw = FALSE)
print(Sys.time()-start)

fam_type <- c("Lowest", "Median", "Highest")
cbind(fam_type, ideo_libcon_me_nri) %>% 
  select(-factor) %>% 
  xtable(digits = c(0,0, rep(3, 4)),
         caption = "Average Marginal Effect of Ideology (Conservative) on Party (Republican) by familiarity score") %>% 
  print(include.rownames= FALSE, type = "html", caption.placement="top") %>% 
  cat(file = "jen_output/tables/ideo_libcon_me1.html")


# correlation bt ideology and party for high and low familiarity
df_all %>% 
  filter(!is.na(ideo_awareness_scaled.center),
         r_race == "White" | r_race == "Black") %>% 
  group_by(year, r_race) %>% 
  mutate(fam_top = max(ideo_awareness_scaled.center),
         fam_med = median(ideo_awareness_scaled.center)) %>% 
  ungroup() %>% 
  mutate(across(starts_with("fam"), 
                ~ifelse(ideo_awareness_scaled.center >= ., 1, 0))) %>% 
  pivot_longer(starts_with("fam"), names_to = "type",
               values_to = "group") %>% 
  group_by(r_race, group, type) %>% 
  summarise(ideo_party_cor = cor(r_ideology.center, r_party, 
                                 use = "pairwise.complete.obs")) %>% 
  ungroup() %>% 
  arrange(r_race, type, group) %>% 
  mutate(type = case_when(type == "fam_med" & group == 1 ~ ">= median",
                          type == "fam_med" & group == 0 ~ "< median",
                          type == "fam_top" & group == 1 ~ "Only top",
                          type == "fam_top" & group == 0 ~ "Less than top"
                          
         )) %>% 
  select(r_race, type, ideo_party_cor) %>% 
  pivot_wider(names_from = type,  values_from = ideo_party_cor) %>% 
  xtable(digits = c(0, 0, rep(4,4)),
         caption = "Ideology and party correlation by race and liberal-conservative familiarity score grouping within race") %>% 
  print(include.rownames= FALSE,  caption.placement="top", type = "html") %>% 
  cat(file = "jen_output/tables/ideo_party_cor_by_famscore.html")


g_ideo_par_cor_fam <- df_all %>% 
  filter(!is.na(ideo_awareness_scaled.center),
         r_race == "White" | r_race == "Black") %>% 
  group_by(year, r_race) %>% 
  mutate(fam_top = max(ideo_awareness_scaled.center),
         fam_med = median(ideo_awareness_scaled.center)) %>% 
  ungroup() %>% 
  mutate(across(starts_with("fam"), 
                ~ifelse(ideo_awareness_scaled.center >= ., 1, 0))) %>% 
  pivot_longer(starts_with("fam"), names_to = "type",
               values_to = "group") %>% 
  group_by(year, r_race, group, type) %>% 
  summarise(ideo_party_cor = cor(r_ideology.center, r_party, 
                                 use = "pairwise.complete.obs")) %>% 
  ungroup() %>% 
  mutate(type = case_when(type == "fam_med" & group == 1 ~ ">= median",
                          type == "fam_med" & group == 0 ~ "< median",
                          type == "fam_top" & group == 1 ~ "Only top",
                          type == "fam_top" & group == 0 ~ "Less than top"
                          
  )) %>% 
  pivot_wider(names_from = "r_race", values_from = "ideo_party_cor") %>% 
  ggplot(aes(x = year)) +
  geom_point(aes(y = White)) +
  geom_smooth(aes(y = White), color = "black", se = FALSE) +
  geom_point(aes(y = Black), color= "red", shape=21) +
  geom_smooth(aes(y = Black), color = "red", linetype = "dashed", se = FALSE) +
  labs(x = "Year", y = "Party and ideology correlation") +
  facet_wrap(~type)
g_ideo_par_cor_fam

ggsave(g_ideo_par_cor_fam, filename = "jen_output/figures/ideo_par_cor_fam.pdf",
       height = 6, width = 8)
